/*
*APCED - Data preparation for community cohort's analysis
*Started 11/07/2021
*Last worked on: 03/14/2023
*The purpose of this .do file is to prepare the analytical data file for the community cohort analysis
********************************************************************************
*/

*GENERAL*
clear all
macro drop _all
set more off
set scrollbufsize 2000000 
set linesize 200

*DATE*
global currdate: display %td_CCYY_NN_DD date(c(current_date), "DMY")
global date = subinstr(trim("$currdate"), " ", "_", .)

*LOG*
cd "P:\apced\h2o\datastore\Aim2\source"
capture log close 
log using ..\output\dataprep_comm_cohort_$date.log, text replace


*MAIN*
capture program drop main
program define main

	quietly do ..\source\programs.do
	clean_comm_cohort
	do ../source/rurality_prep.do
	merge_rurality_outcomes
	zip_code_chars
	alt_model_clean
	eststo clear
	cnty_mkt_vars //AHRF_LONG DATA!!
	log close

end
**********

capture program drop data_clean
program define data_clean

drop if mi(res_cnty)


capture drop race_*
tab race, gen(race_)
capture drop white 
capture drop black 
capture drop hispanic 
capture drop other_race
rename race_1 white
rename race_2 black
rename race_3 hispanic
rename race_4 other_race

tab res_rurality, gen(res_)
capture drop not_metro_micro
capture drop metro
capture drop micro
rename res_1 not_metro_micro
rename res_2 metro
rename res_3 micro 


end
**********

capture program drop clean_comm_cohort
program define clean_comm_cohort

use "P:\apced\shared\Aim1\data_analysis\bene_level_ALL.dta", clear
keep bene_id_18900 res_cnty
save ..\source\bene_res_cnty.dta, replace

use "P:\apced\shared\Aim2\source\adrd_ffs_cohort_all.dta", clear
keep if apc_involve != .
keep if cohort == 1
capture drop _merge
merge 1:1 bene_id_18900 using ..\source\bene_res_cnty.dta
keep if _merge==3
drop _merge
data_clean
save "P:\apced\shared\Aim2\source\adrd_ffs_cohort_comm.dta", replace
	
end
**********


capture program drop merge_rurality_outcomes
program define merge_rurality_outcomes

use "P:\apced\shared\Aim2\source\adrd_ffs_cohort_comm.dta", clear
capture drop _merge
merge m:1 res_cnty using ../source/ahrf.dta 
keep if _merge == 3
drop _merge

merge 1:1 bene_id_18900 using "P:\apced\shared\Data retrieval\decd_cohort_outcomes_new.dta"
tab _merge
capture drop outcome_merge_ind
gen outcome_merge_ind = _merge
drop _merge
keep if outcome_merge_ind == 3


	*Generate percentage variable for outcomes: 30-day hosp and hospice death:
	capture drop hosptl_30day100
		gen hosptl_30day100 = hosptl_30day * 100
	capture drop hospice_death100
		gen hospice_death100 = hospice_death * 100


save ..\source\merge_comm_outcomes.dta, replace

end
**********

capture program drop zip_code_chars
program define zip_code_chars

	use "P:\apced\shared\Data retrieval\zipcode_sdi.dta", clear
	tempfile zippy_sdi
	save `zippy_sdi'
	
	use "P:\apced\shared\Data retrieval\zipyear_share_long.dta", clear
	keep if year >= 2016
	tempfile zippy_share
	save `zippy_share'
		
	use "..\source\merge_comm_outcomes.dta", clear 
		
	capture drop _merge
	merge m:1 hkzip using `zippy_sdi'
	capture drop zip_sdi_comm_merge
	gen zip_sdi_comm_merge = _merge
	drop _merge
	forvalues i = 1/3 {
	    count if zip_sdi_comm_merge == `i'
		local merge_`i' = `r(N)'
	}
	
	display "The number of records from the NH cohort file that did not find a match with the zip code SDI file is " `merge_1' " records."
	display ""
	display "The number of records from the zip code SDI file that did not find a match with the NH cohort file is " `merge_2' " records."
	display ""
	display "The number of records that found a match between the NH cohort file and the zip code SDI was " `merge_3' " records."
	display ""
	
	capture drop _merge
	merge m:1 hkzip year using `zippy_share'
	capture drop zip_share_comm_merge
	gen zip_share_comm_merge = _merge
	drop _merge
	forvalues i = 1/3 {
		count if zip_share_comm_merge == `i'
		local merge_`i' = `r(N)'
	}
	
	display "The number of records from the NH cohort file that did not find a match with the zip code share file is " `merge_1' " records."
	display ""
	display "The number of records from the zip code share file that did not find a match with the NH cohort file is " `merge_2' " records."
	display ""
	display "The number of records that found a match between the NH cohort file and the zip code SDI was " `merge_3' " records."
	display ""
	
	quietly count if zip_share_comm_merge == 3 & zip_sdi_comm_merge == 3
	display "We are now working with a sample of " `r(N)' " individual-level records"
	keep if zip_share_comm_merge == 3 & zip_sdi_comm_merge == 3
	gen zip_hmo_share = hmo_share
	gen zip_dual_share = dual_share
	gen zip_sdi = sdi
	
	
**Clean out all variables that have only missing values
capture ssc install mdesc
capture ssc install missings

	missings dropvars, force //eliminates all variables with all missing values
	
**Create numerical apc_involve variable
capture drop apc_involve_num
	destring apc_involve, gen(apc_involve_num)
	
	
	foreach var in ahrf_totpop pci medinc {
		capture drop ln_`var'
			gen ln_`var' = log(`var')
	}
	
	foreach var in ahrf_pophisp10 pov ahrf_hsdip ahrf_college {
		capture drop `var'_10
			gen `var'_10 = `var'/10
	}
	
	capture drop if race == .
	capture drop if zip_sdi == .
	
	capture drop state_group
egen state_group = group(res_state)

	

**Save final dataset
	save ..\source\aim2_comm_af.dta, replace
	capture erase ..\source\merge_comm_outcomes_zip.dta
	capture erase ..\source\merge_comm_outcomes.dta
	
end
*********



capture program drop alt_model_clean
program define alt_model_clean


	use ..\source\aim2_comm_af.dta, clear
	merge 1:1 bene_id_18900 using "P:\apced\shared\Data retrieval\adrd_ffs_cohort_newvars.dta", keepusing(*_8mo_* *_30d_*) keep(3)
	capture drop high_volume
		gen high_volume = (all_total >= 8)
	
	capture drop apc_volume
		gen apc_volume = .
			replace apc_volume = 0 if high_volume == 0 
			replace apc_volume = 1 if apc_rate < 0.5 & high_volume == 1
			replace apc_volume = 2 if apc_rate >= 0.5 & high_volume == 1
	
	
	label variable apc_volume "High volume and APC rate"
	capture label drop a_v
		label define a_v 0 "Low volume (< 8 E+M visits)" 1 ">= 8 E+M visits& <50% APC rate" 2 ">= 8 E+M visits& >= 50% APC rate"
		label values apc_volume a_v
	tab apc_volume
	
	keep if apc_involve != . 
	

	capture drop ever_base_er
		gen ever_base_er = er_count > 0
		tab ever_base_er
		
	capture drop er_above_med
		sum er_count, detail
		local p75 = `r(p75)'
		gen er_above_p75 = (er_count >= `p75')
		tab er_above_p75
		
	*Generate percentage variable for outcomes: 30-day hosp and hospice death:
	capture drop hosptl_30day100
		gen hosptl_30day100 = hosptl_30day * 100
	capture drop hospice_death100
		gen hospice_death100 = hospice_death * 100
	
	capture drop _merge
	
	gen int year_int = year
	drop year
	rename year_int year
	
	save semifinal_comm_af.dta, replace

	
end

capture program drop cnty_mkt_vars //CHANGE THIS UP!
program define cnty_mkt_vars

	tempfile ahrf_temp
	use "P:\apced\shared\Data retrieval\ahrf_long.dta", clear
	capture drop res_cnty 
		gen res_cnty = fips
	destring year, replace
	save `ahrf_temp'
	use ..\source\semifinal_comm_af.dta
	merge m:1 res_cnty year using `ahrf_temp', keepusing(snf_bed hha)
	drop if _merge == 2
	save final_comm_af.dta, replace


end


*******
capture program drop ipwra_models
program define ipwra_models

	
	foreach var of varlist hosptl_30day100 hospice_death100 {
		eststo, prefix(`var'_ipwra_): teffects ipwra (`var' i.year i.state_group er_count acute_days age adrd_years female white black hispanic ccw dual_9mo ip_days ltc_days hh_days home_days hospc_days res_rurality ami anem asth atrfib chf chrkid copd depr diab glauc hipfx hyprlip bph hyperten hypothy ischhd osteo arthr stroke schi schiot bipl pvd ulcers psds drug cancer zip_sdi zip_hmo_share snf_bed hha) (apc_involve i.year i.state_group er_count acute_days age adrd_years female white black hispanic ccw dual_9mo ip_days ltc_days hh_days home_days hospc_days res_rurality ami anem asth atrfib chf chrkid copd depr diab glauc hipfx hyprlip bph hyperten hypothy ischhd osteo arthr stroke schi schiot bipl pvd ulcers psds drug cancer zip_sdi zip_hmo_share snf_bed hha), osample(violator_inv_`var')
		
		tab violator_inv_`var'
		
		br if violator_inv_`var' == 1
	
		eststo, prefix(`var'_ipwra_): teffects ipwra (`var' i.year i.state_group er_count acute_days age adrd_years female white black hispanic ccw dual_9mo ip_days ltc_days hh_days home_days hospc_days res_rurality ami anem asth atrfib chf chrkid copd depr diab glauc hipfx hyprlip bph hyperten hypothy ischhd osteo arthr stroke schi schiot bipl pvd ulcers psds drug cancer zip_sdi zip_hmo_share snf_bed hha) (apc_volume i.year i.state_group er_count acute_days age adrd_years female white black hispanic ccw dual_9mo ip_days ltc_days hh_days home_days hospc_days res_rurality ami anem asth atrfib chf chrkid copd depr diab glauc hipfx hyprlip bph hyperten hypothy ischhd osteo arthr stroke schi schiot bipl pvd ulcers psds drug cancer zip_sdi zip_hmo_share snf_bed hha), osample(violator_vol_`var')
	
	}

drop if violator_inv_hosptl_30day100 == 1

save final_comm_af.dta, replace //Save manually

end

*******

main
	








